# ==============================================================================
# DESCRIPTION ----
# ==============================================================================

# This code explores descriptive statistics from the cleaned amakudari.RData
# and outputs tables and figures. 

# ==============================================================================
# LIBRARIES AND IMPORT ----
# ==============================================================================

# Options
options(scipen = 100)

# Libraries
library(tidyverse)
library(formattable)
library(kableExtra)

# Data import
amakudari <- read_csv("data/amakudata.csv")
load("data/nikkei_financials.Rdata")

# ==============================================================================
# EXAMPLE DATASET ----
# ==============================================================================
set.seed(999)
# Create example of data 
amakudari_example <- amakudari %>%
  select(date_ret, agency, ministry_short, firm_dest_en, firm_type1_en, tse_code) %>%
  filter(firm_dest_en != "99" & agency != "99", agency != "Police") %>%
  arrange(date_ret) %>%
  mutate(date_ret = as.character(date_ret)) %>%
  slice_sample(n = 50) %>%
  mutate(firm_dest_en = str_replace(firm_dest_en, "&", "and"))
  
# Export table 
amakudari_example %>%
  kable(format = 'latex', booktabs = TRUE, 
        caption = "Amakudari dataset example",
        label = " amakudari_example") %>%
  kable_styling(latex_options = c("striped", "HOLD_position", "scale_down")) %>%
  save_kable("tables/a1_amakudari_example.tex")

# ==============================================================================
# TABLE OF AMAKUDARI DESTINATIONS BY INDUSTRY ----
# ==============================================================================
# Create list of all stock corporations in Nikkei NEEDS by industry
industry_all <- nikkei_time %>% group_by(nikkei_code) %>% slice(1)
industry_all <- industry_all %>%
  group_by(industry) %>%
  summarise(N = n(), percent_firms = N/nrow(industry_all)) %>%
  arrange(-N) %>%
  select(-N)

# Count amakudari by industry
amakudari_industry <- nikkei_time %>%
  group_by(industry) %>%
  summarise(amakudari = sum(amakudari_n)) %>%
  arrange(-amakudari)

# Percentage of amakudari by industry
percent_amakudari <- nikkei_time %>% filter(amakudari_binary != 0)
percent_amakudari <- percent_amakudari %>%
  group_by(industry) %>%
  summarise(percent_amakudari = n()/nrow(percent_amakudari))

# Combine amakudari and industry lists
amakudari_industry <- left_join(amakudari_industry, industry_all, by = "industry")
amakudari_industry <- left_join(amakudari_industry, percent_amakudari, by = "industry")

# Clean final dataset for plotting
amak_indust_tbl <- amakudari_industry %>%
  mutate(
    percent_amakudari = round(percent_amakudari*100, 1),
    percent_firms = round(percent_firms*100, 1),
    Difference = percent_amakudari - percent_firms) %>%
  rename(Industry = industry,
         `Count amakudari` = amakudari,
         `Percent of firms` = percent_firms,
         `Percent of amakudari` = percent_amakudari
         ) %>%
  arrange(desc(`Count amakudari`), desc(Difference)) %>%
  filter(Industry != "Nonclassifiable")

# FIGURE A2: Shortened table with visualizations
amak_indust_tbl %>% slice(1:10) %>%
  mutate(
    `Count amakudari` = color_bar("deepskyblue")(`Count amakudari`),
    Difference = color_tile("gainsboro", "seagreen")(Difference)
  ) %>%
  select(Industry, everything()) %>%
  kable("html", escape = F, align = c("l", "l", "c", "c", "c")) %>%
  kable_styling("condensed", full_width = F, html_font = "Times New Roman") %>%
  column_spec(1, color = "black") %>%
  column_spec(2, color = "white", bold = T) %>%
  column_spec(3, color = "black") %>%
  column_spec(4, color = "black") %>%
  column_spec(5, color = "white", bold = T, width = "3cm") %>%
  save_kable("figures/a2_amakudari_industry_short.png", zoom = 5)

# TABLE A2: Output to LaTeX: full table
amak_indust_tbl %>%
  kable(format = 'latex', booktabs = TRUE,
        caption = "Amakudari industry destinations vs. overall economy", 
        label = "amakudari_firms_full",
        align = c("l", "c", "c", "c", "c")) %>%
  kable_styling(
    latex_options = c("striped", "HOLD_position", "scale_down")) %>%
  footnote(
    general = "``Percent economy'' calculation is the total number of firms in each industry divided by all firms in the Nikkei NEEDS database.",
    general_title = "Note: ",
    footnote_as_chunk = T)  %>%
  save_kable("tables/a2_amakudari_industry_full.tex")

# ==============================================================================
# TABLE OF BUREAUCRAT DEMOGRAPHICS ----
# ==============================================================================
##### Age #####
# Mean
age_mean <- amakudari %>%
  group_by(firm_type4_en, firm_type2_en) %>%
  summarise(Mean = round(mean(age_ret), 0))

# Age: quantiles
age_quantiles <- amakudari %>%
  group_by(firm_type4_en, firm_type2_en) %>%
  do(data.frame(t(quantile(.$age_ret, probs = c(0.05, 0.25, 0.50, 0.75, 0.95))))) %>%
  rename("5" = X5., "25" = X25., "Median" = X50.,
         "75" = X75., "95" = X95.) %>%
  mutate_if(is.numeric, round, 0)

# Combine mean age and quantiles
age <- left_join(age_mean, age_quantiles)

# Clean dataframes
age <- age %>%
  ungroup(firm_type4_en, firm_type2_en) %>%
  mutate(
    firm_type4_en = ifelse(is.na(firm_type4_en), "Missing", firm_type4_en), # Removed once gone
    firm_type4_en = ifelse(firm_type4_en == "99", "Unclassified", firm_type4_en), # Removed once gone
    firm_type2_en = ifelse(grepl("na", firm_type2_en), "", firm_type2_en), # Removed once gone
    firm_type2_en = ifelse(grepl("99", firm_type2_en), "", firm_type2_en)) %>%
  rename("Firm type" = firm_type4_en, "Firm sub-type" = firm_type2_en) # Rename columns

# TABLE A3: Output to LaTeX table
stargazer::stargazer(age, summary = FALSE, rownames = FALSE,
          out = "tables/a3_age.tex", 
          title= "Age of retirement: mean and quantiles (all years)",
          label = "tab: age",
          column.sep.width = "0.5cm")

# Footnote 19 figures ----------------------------------------------------------
# Flow from MOF to banks
banks <- amakudari %>%
  filter(firm_type3_en == "For-profit",
         str_detect(firm_dest, "銀行")) %>%
  group_by(ministry_short, agency) %>%
  summarise(total_ministry = n()) %>%
  mutate(Percentage = total_ministry/sum(total_ministry))

# MOF to banks
mof_banks <- amakudari %>% 
  filter(ministry_short == "MOF", 
         firm_type3_en == "For-profit",
         str_detect(firm_dest, "銀行"))
length(mof_banks$ministry) # MOF to private banks

# Flows from MOF to Shinkin banks
mof_shinkin_banks <- amakudari %>%
  filter(ministry_short == "MOF", firm_type1_en == "Shinkin bank") %>%
  mutate(rfb = str_detect(post_former, "財務局")) %>%
  select(date_ret, post_former, ministry_short, agency, rfb, firm_dest, post_dest,
         indust_dest, industry, firm_type1_en)
length(mof_shinkin_banks$rfb) # Total MOF to shinkin bank movements
sum(mof_shinkin_banks$rfb == "TRUE") # Regional finance bureaus to shinkin banks

# Flows from FSA to Shinkin banks
fsa_shinkin_banks <- amakudari %>%
  filter(firm_type1_en == "Shinkin bank") %>%
  mutate(fsa = agency == "Financial Services Agency") %>%
  select(date_ret, post_former, ministry_short, agency, fsa, firm_dest, post_dest,
         indust_dest, industry, firm_type1_en)
sum(fsa_shinkin_banks$fsa == "TRUE") # Total FSA to shinkin banks

# Text page 15 -----------------------------------------------------------------
top_public <- amakudari %>%
  mutate(vm = if_else(
    position_level == "Vice-Minister" | 
    position_level == "Assistant Vice-Minister", 
    1, 0, missing = 0)) %>%
  filter(
    firm_type2_en == "Public Interest",
    firm_dest_en != "99") %>% # Remove unclassified firms (or becomes largest total)
  group_by(firm_dest_en) %>%
  summarise(total = n(), total_vm = sum(vm)) %>%
  arrange(-total) %>%
  slice(1:10)
